/****************************************************************************************************************
* Copyright � Mathematica Policy Research, Inc.
* This code cannot be copied, distributed or used without the express written permission
* of Mathematica Policy Research, Inc.
   Program	   				: 01_Random_Forest.sas
   Purpose     				: Runs a random forest model on the analytic file.
   Programmer  				: Claire Erba
   Researcher  				: Michael Levere
   Specificatn 				: N/A
   Project Name				: SSI Medicaid
   Project Number			: 055827
   Date Created				: 02/24/2023
   Runtime     				: [TBD]
   QA          				: Michael Levere
   Input files 				: Analytic_File datasets
   Output files				: Excel Output

   Modifications or Updates	: 02/24/2023 - New program.
							  03/17/2023 - CErba updated binary variables and continuous variables to have in the 
										   model using effect size logic. Added logic so that if the number of state  
										   benes > 1.5 million, then determine what percentage of a sample will get 
										   us to 1.5 million benes and limit to a random sample of that percent. Use 
										   80% otherwise. Created charts for benes the model was trained on and not
										   trained on (as well as for overall). Create mean tables for variables
										   where the bene is not on SSI but had > X% prob of being on SSI.
							  03/27/2023 - CErba updated to export graphs to an Excel file.
							  03/28/2023 - CErba removed urban/rural variable because there is an error in it that
										   ASPE is trying to resolve. Replaced with zcta5a urban/rural flags.
							  04/06/2023 - CErba added means for all non-SSI benes within a state. Added another
										   workbook that has means for those with income codes 01-05 or missing. 
										   Produced frequency of those with missing income codes for each state 
										   and probability of being on SSI.
							  04/20/2023 - CErba added demographics to the tables. Create additional table that
										   shows bene_cnty_cd, # bene not on SSI but > 40% prob of being on SSI, and
										   # benes on SSI. Run each state's model for the entire U.S. and create a
										   table for each state's model that is the same as the bene_cnty_cd table
										   but by state. Assume all benes are not on SSI for "bad states". Created 
										   option to not rerun random forest model so results do not change. Download 
										   the "Loss Reduction Variable Importance" table for each state.
							  06/13/2023 - CErba added code to export graph data.
							  06/30/2023 - CErba fixed mistake on what variables to include in random forest.
*********************************************************************************************************************/
*====================================================================*;        
* Set Up															 *;
*====================================================================*;
/* Options */
options mprint symbolgen center;
options ls=154 ps=63 formchar="�----�+�---+=�-/\<>*" msglevel=I;
options mergenoby=warn dkrocond=warn dkricond=warn compress=no;
options varinitchk=warn dtreset source;
options validvarname=v7;

/* Formats */
proc format;
	value mask_means
		99999999 = "*"
		999999999 = "1-10"
		other = [best.]
		;
	value flags 
		0 = 'No'
		1 = 'Yes'
		;
run;

%let run_dir = [ENTER PATH]/analytic_file_for_ssa;

/* Obtain starting time */
%let timer_start = %sysfunc(datetime());
data _null_;
	now = &timer_start.;
	put "Timer starting at " now dateampm.;
run;

/* main random forest macro */
%macro random_forest(year, rerun_hpf, rerun_only_overall_graph_data, rerun_tables);

	/* Set overall paths and libraries */
	x "mkdir -p &run_dir./_intermediate/Random_Forest__01/&year.";
	libname permwork "&run_dir./_intermediate/Random_Forest__01/&year.";
	options user = permwork minoperator; 
	x "mkdir -p &run_dir./_intermediate/Random_Forest__01/&year.";
	libname tmp "&run_dir./_intermediate/Random_Forest__01/&year.";

	/* Set up non state specific log/lst/html output files */
	%let datestamp = %sysfunc(date(),yymmddn8.);
	%let logfile = &run_dir./02_log_lst/Random_Forest/&year./01_Random_Forest_&datestamp..log;
	%let lstfile = &run_dir./02_log_lst/Random_Forest/&year./01_Random_Forest_&datestamp..lst;
	%let htmlfile = &run_dir./02_log_lst/Random_Forest/&year./01_Random_Forest_&datestamp..html;
	proc printto 
	    log="&logfile." 
	    print="&lstfile." new; 
	run;
	ods html body="&htmlfile." style=HTMLBlue;
	%let out_excel = &run_dir./03_data/Random_Forest/&year.;

	/* Set year specific libraries */
	libname analytic "&run_dir./03_data/Analytic_File/&year." access=readonly;

	/* Universal imports */
	%let lookups = &run_dir./00_lookup_tables;
	proc import datafile = "&lookups./zcta_ses_data.xlsx" out = zcta_ses_data
		dbms = xlsx
		replace;
	run;
 
	/* Determine what variables to drop from random forest model (because there are too many) */
	* calculate summary statistics on SSI and non-SSI benes;
	%do i = 0 %to 1;
		title "Proc Means of Variables for SSI Benes = &i., &year.";
		proc means data = analytic.analytic_file_&year. (drop = bene_id birth_dt death_dt da_run_id in_: year where = (ssi_id = &i.)) STACKODSOUTPUT;
			var _NUMERIC_;
			ods output summary = ssi_&i._means_&year.;
		run;
		data ssi_&i._means_&year.;
			set ssi_&i._means_&year.;
			if variable ne "ssi_id";
			drop label min max;
			rename variable = variable_&i. n = n_&i. mean = mean_&i. stddev = stddev_&i.;
		run;
	%end;
	* merge summary statistics for SSI and non-SSI benes;
	proc sql;
		create table ssi_01_means_&year. as
		select	a.*,
				b.*
		from ssi_0_means_&year. as a
		left join
		ssi_1_means_&year. as b
		on a.variable_0 = b.variable_1;	
	quit;
	* determine effect size;
	data ssi_01_means_&year.;
		set ssi_01_means_&year.;
		if lowcase(substr(variable_0, 1, 3)) in ("num", "min", "max", "tot", "avg") or index(lowcase(variable_0), "median") > 0 or variable_0 = "AGE_NUM" then do;
			binary_variable = 0;
			pooled_std = sqrt((((n_1-1)*(stddev_1**2))+((n_0-1)*(stddev_0**2)))/(n_1+n_0-2));
			if pooled_std ne 0 then effect_size = (mean_1-mean_0)/pooled_std;
		end;
		else do;
			binary_variable = 1;
			if mean_1/(1-mean_1) > 0 then do;
				part1 = log((mean_1/(1-mean_1))); 
			end;
			if mean_0/(1-mean_0) > 0 then do;
				part2 = -log((mean_0/(1-mean_0))); 
			end;
			if not missing(part1) or not missing(part2) then do;
				effect_size = sum(part1, part2)/1.65;
			end;
		end;
	run;

	* if mean_0 < 0.01 and mean_1 < 0.01 and effect_size < 2 for binary variables then drop those binary variables and their corresponding numeric variables;
	data ssi_01_means_drop_&year.;
		set ssi_01_means_&year.;
		if binary_variable = 1 then do;
			if mean_0 < 0.01 and mean_1 < 0.01 and effect_size < 2 then do;
				bin_variable_drop = variable_0;
				if length(bin_variable_drop) >= 9 and substr(bin_variable_drop, 1, 9) = "any_clms_" then check_for = substr(bin_variable_drop, 10);
				else check_for = bin_variable_drop;
			end;
		end;
	run;
	proc sql noprint;
		select distinct check_for into :cf_vars separated by " "
		from ssi_01_means_drop_&year.;
	quit;
	data ssi_01_means_drop2_&year.;
		set ssi_01_means_drop_&year.;
		%do i = 1 %to %sysfunc(countw(&cf_vars.));
			%let cf_var = %scan(&cf_vars., &i.);
			if index(variable_0, "&cf_var.") > 0 then cutvar = 1;
		%end;
	run;
	*put variables to drop, binary/interval variables to keep in macros;
	title "Variables to Drop, &year.";
	proc sql;
		select distinct variable_0 into :drop_vars
		from ssi_01_means_drop2_&year.
		where cutvar = 1;
	quit;
	title "Binary Variables to Keep, &year.";
	proc sql;
		select distinct variable_0 into :binary_vars_initial separated by " "
		from ssi_01_means_drop2_&year.
		where missing(cutvar) and binary_variable = 1;
	quit;
	title "Interval Variables to Keep, &year.";
	proc sql;
		select distinct variable_0 into :inter_vars_initial separated by " "
		from ssi_01_means_drop2_&year.
		where missing(cutvar) and binary_variable = 0;
	quit;
	proc datasets lib = permwork nolist;
		delete ssi_:;
	run;

	/* Setup overall macros */
	%let encode_vars =	INCM_CD SEX_CD PROGRAM_TYPE BENEFIT_PACKAGE DUAL_STATUS RACE_ETHNICITY;
	%let binary_vars =  &binary_vars_initial. rural INCM_CD_: SEX_CD_: PROGRAM_TYPE_: BENEFIT_PACKAGE_: DS_: RACE_ETHNICITY_:;
	%let inter_vars  = 	&inter_vars_initial. education9 education12 white_collar unemployed owner_occupied crowded_household below_poverty below_150poverty single_parent vehicle telephone 
						plumbing median_mortgage median_rent median_value median_income income_disparity;
	%let cat_vars 	 = 	rucc_2013;
	%let table_vars  = 	any_clms_sptc_grp_019 any_clms_sptc_grp_028 any_clms_sptc_class_034 any_clms_sptc_class_130 any_clms_sptc_class_149 any_clms_sptc_class_163 any_clms_sptc_class_171 
					  	any_clms_sptc_class_221 any_clms_sptyc_26 any_clms_sptyc_32 any_clms_sptyc_42 any_clms_tos_002 num_clms_tos_012 any_clms_tos_012 any_clms_tos_032 num_clms_tos_033 
					  	any_clms_tos_033 any_clms_tos_043 any_clms_tos_053 any_clms_ndc_ml_code_004 any_clms_ndc_ml_code_009 any_clms_ndc_ml_code_034 any_clms_ndc_ml_code_036 BENE_CERPAL 
					  	BENE_EPILEP BENE_INTDIS BENE_LEADIS BENE_OTHDEPL cc_adhd oth_mh_flag age_num sex_cd_f sex_cd_m sex_cd_miss incm_cd_01 incm_cd_02 incm_cd_03 incm_cd_04 incm_cd_05 
					  	incm_cd_06 incm_cd_07 incm_cd_08 incm_cd_miss race_ethnicity_aian race_ethnicity_asian race_ethnicity_black race_ethnicity_hawaiian_pi race_ethnicity_hispanic 
					  	race_ethnicity_white race_ethnicity_multi race_ethnicity_unknown race_ethnicity_miss;

	/* One hot encoding of categorical variables to make binary; only keep variables that are of interest to the model */
	data tmp.analytic_file_&year.;
		set analytic.analytic_file_&year. (keep = msis_id submtg_state_cd ssi_id bene_zip_cd bene_cnty_cd &encode_vars. &binary_vars_initial. &inter_vars_initial.);
		/* update race */
		if RACE_ETHNICITY = "American Indian and Alaska Native, non-Hispanic" then RACE_ETHNICITY = "AIAN";
		if RACE_ETHNICITY = "Asian, non-Hispanic" then RACE_ETHNICITY = "Asian";
		if RACE_ETHNICITY = "Black, non-Hispanic" then RACE_ETHNICITY = "Black";
		if RACE_ETHNICITY = "Hawaiian/Pacific Islander" then RACE_ETHNICITY = "Hawaiian_PI";
		if RACE_ETHNICITY = "Hispanic, all races" then RACE_ETHNICITY = "Hispanic";
		if RACE_ETHNICITY = "White, non-Hispanic" then RACE_ETHNICITY = "White";
		if RACE_ETHNICITY = "Multiracial, non-Hispanic" then RACE_ETHNICITY = "Multi";
	run;
	%macro hot_encoding(data, var);
		proc sql noprint;
			select distinct &var. into :val1-
			from &data.
			where not missing(&var.);

			select count(distinct(&var.)) into :len 
			from &data.
			where not missing(&var.);
		quit;
		data &data.;
			set &data.;
			%do i=1 %to &len.;
				%if &var. ne DUAL_STATUS %then %do;
					if &var. = "&&&val&i" then &var._%sysfunc(compress(&&&val&i,'$ -/,')) = 1 ;
					else &var._%sysfunc(compress(&&&val&i,'$ -/,')) = 0;
				%end;
				%else %if &var. = DUAL_STATUS %then %do;
					if &var. = "&&&val&i" then DS_%sysfunc(compress(&&&val&i,'$ -/,')) = 1 ;
					else DS_%sysfunc(compress(&&&val&i,'$ -/,')) = 0;
				%end;
			%end;
			%if &var. ne DUAL_STATUS %then %do;
				&var._miss = (missing(&var.));
			%end;
			%else %if &var. = DUAL_STATUS %then %do;
				DS_miss = (missing(&var.));
			%end;
		run;
	%mend hot_encoding;
	%do j = 1 %to %sysfunc(countw(&encode_vars.));
		%let v = %scan(&encode_vars., &j.);
		%hot_encoding(data=tmp.analytic_file_&year., var=&v.);
	%end;

	/* Merge on zip code level data */
	data tmp.analytic_file_&year.;
		set tmp.analytic_file_&year.;
		if length(bene_zip_cd) >= 5 then bene_zip_cd_5 = substr(bene_zip_cd, 1, 5);
	run;
	proc sql;
		create table tmp.analytic_file_&year._2 (drop = missing_zip_var) as
		select  a.*,
			    b.*,
				case
					when missing(a.bene_zip_cd_5) then 1
					else 0
				end as bene_zip_missing,
			    case
			   		when missing(b.zcta5a) or b.missing_zip_var = 1 then 1
					else 0
				end as missing_atleast1_zipvar,
			    case
			   		when missing(b.zcta5a) then "ONLY IN ANALYTIC DATA"
					else "MERGED"
				end as _merge
		from tmp.analytic_file_&year. as a
		left join 
		zcta_ses_data as b
		on a.bene_zip_cd_5 = b.zcta5a;
	quit;
	proc datasets lib = tmp nolist;
		delete analytic_file_&year.;
	run;
	title "All U.S.";
	title2 "Number of Unmerged Zip Codes and Indication of If Missing At Least One Zip Code Variable from Left Join of Analytic File with ZCTA_SES_DATA";
	proc freq data = tmp.analytic_file_&year._2;
		table _merge * bene_zip_missing * missing_atleast1_zipvar / list missing;
	run;
	title2 "Unique Unmerged Zip Codes from Left Join of Analytic File with ZCTA_SES_DATA";
	proc freq data = tmp.analytic_file_&year._2 (where = (_merge = "ONLY IN ANALYTIC DATA")) order = freq;
		table bene_zip_cd_5 * submtg_state_cd / list missing;
	run;

	/* Stop log/lst */
	proc printto;
	run;
	ods html close;

	/*******
	Create numeric list of tasks for pulling data via gridding
	*******/
	%let icnt = 1;
	%let states  = 01 04 05 06 08 09 10 12 15 16 18 20 22 23 24 25 26 27 28 30 32 35 36 37 39 42 46 48 53 54 55 56;
	%let states2 = AL AZ AR CA CO CT DE FL HI ID IN KS LA ME MD MA MI MN MS MT NV NM NY NC OH PA SD TX WA WV WI WY;
	%let states3 = "01", "04", "05", "06", "08", "09", "10", "12", "15", "16", "18", "20", "22", "23", "24", "25", "26", 
				   "27", "28", "30", "32", "35", "36", "37", "39", "42", "46", "48", "53", "54", "55", "56";

	/* Total number of states */
	%let totcnt = %sysfunc(countw(&states.));

	/* Set number of tasks that will be gridded and compiled - here it will be equal to the totcnt (see above)*/
	%let task_compile = ;

	%do %until(&icnt > &totcnt);
		%let task_compile = &task_compile task&icnt;
		%let icnt = %eval(&icnt + 1);
	%end;

	%put totcnt="***********Total number of gridding tasks - &totcnt.***********";
	%put "***********List of gridding tasks - &task_compile.***********";

	/******* 
	Grid Code: Grid Credentials
	*******/
	OPTIONS 
		METAUSER="&OUSER"
		METAPASS="&OPASSP"
	;

	/******* 
	Grid code: Enable Grid
	*******/
	%LET CHG_QUEUE=QUEUE=normal; /*"normal" must be lower case*/
	%LET RC=%SYSFUNC(GRDSVC_ENABLE(_ALL_, RESOURCE=sasCCW%str(;) JOBOPTS=CHG_QUEUE));

	/******* 
	Grid it
	*******/
	%do s = 1 %to &totcnt.;

		/* Grid Code: Sign on to remote session for a task */
		SIGNON TASK&s.;

		/* Copy necessary macro variables to the remote server session */
		%syslput _all_;

		/* Grid Code: Start remote submission for a task */
		RSUBMIT TASK&s.
		CONNECTWAIT=NO /* Run tasks asynchronously */
		CONNECTPERSIST=NO /* Task end (i.e. sign off when complete) */
		;

			/* Libraries */
			options center;
			libname permwork "&run_dir./_intermediate/Random_Forest__01/&year.";
			options user = permwork minoperator;
			libname tmp "&run_dir./_intermediate/Random_Forest__01/&year."; 
			libname analytic "&run_dir./03_data/Analytic_File/&year.";
			libname out "&run_dir./03_data/Random_Forest/&year.";

			/* Read in analytic file the state */
			%macro run_state;

				/* Set up log/lst/html output files for each state */
				%let out_excel = &run_dir./03_data/Random_Forest/&year.;
				%let state = %scan(&states., &s.);
				%let state2 = %scan(&states2., &s.);
				%let datestamp = %sysfunc(date(),yymmddn8.);
				%let logfile = &run_dir./02_log_lst/Random_Forest/&year./01_Random_Forest_&state2._&datestamp..log;
				%let lstfile = &run_dir./02_log_lst/Random_Forest/&year./01_Random_Forest_&state2._&datestamp..lst;
				%let htmlfile = &run_dir./02_log_lst/Random_Forest/&year./01_Random_Forest_&state2._&datestamp..html;
				proc printto 
				    log="&logfile." 
				    print="&lstfile." new; 
				run;
				ods html body="&htmlfile." style=HTMLBlue;

				/* Formats */
				proc format;
					value $ f_elig_grp
						'01_PREG'   = 'Pregnant'
						'02_CHILD'  = 'Children'	
						'03_ADULT'  = 'Adult'
						'04_DISBLD' = 'Persons with disabilities'
						'05_AGED'   = 'Aged'
						'06_EXPNSN' = 'Adult expansion'
						'07_COVID'  = 'COVID newly eligible'
						''          = 'Unknown'
						;
					value flags 
						0 = 'No'
						1 = 'Yes'
						;
					value mask_means
						99999999 = "*"
						999999999 = "1-10"
						other = [best.]
						;
					value $ miss
						' ' = 'Missing'
						other = 'Non-Missing'
						;
				run;

				/* Limit to state */
				data analytic_&year._&state2.;
					set tmp.analytic_file_&year._2 (/*obs=10000*/ where = (submtg_state_cd = "&state."));
					/* label variables */
					label 
						any_clms_sptc_grp_019 = "Any Claims With srvc_prvdr_txnmy_cd_grp = Agencies"
						any_clms_sptc_grp_028 =	"Any Claims With srvc_prvdr_txnmy_cd_grp = Suppliers"
						any_clms_sptc_class_034	= "Any Claims With srvc_prvdr_txnmy_cd_class = Psychiatry & Neurology"
						any_clms_sptc_class_130	= "Any Claims With srvc_prvdr_txnmy_cd_class = Occupational Therapist"
						any_clms_sptc_class_149	= "Any Claims With srvc_prvdr_txnmy_cd_class = Speech-Language Pathologist"
						any_clms_sptc_class_163	= "Any Claims With srvc_prvdr_txnmy_cd_class = Community/Behavioral Health"
						any_clms_sptc_class_171 = "Any Claims With srvc_prvdr_txnmy_cd_class = Local Education Agency (LEA)"
						any_clms_sptc_class_221	= "Any Claims With srvc_prvdr_txnmy_cd_class = Durable Medical Equipment & Medical Supplies"
						any_clms_sptyc_26 = "Any Claims With srvc_prvdr_type_cd = Psychologist, Clinical"
						any_clms_sptyc_32 = "Any Claims With srvc_prvdr_type_cd = Clinic or Group Practice"
						any_clms_sptyc_42 = "Any Claims With srvc_prvdr_type_cd = Hospital-General"
						any_clms_tos_002 = "Any Claims With tos_cd = Outpatient hospital services"
						num_clms_tos_012 = "Number of Claims With tos_cd = Physicians' services"
						any_clms_tos_012 = "Any Claims With tos_cd = Physicians' services"
						any_clms_tos_032 = "Any Claims With tos_cd = Speech, hearing, and language disorders services (when not provided under home health services)"
						num_clms_tos_033 = "Number of Claims With tos_cd = Prescribed drugs"
						any_clms_tos_033 = "Any Claims With tos_cd = Prescribed drugs"
						any_clms_tos_043 = "Any Claims With tos_cd = Rehabilitative services"
						any_clms_tos_053 = "Any Claims With tos_cd = Targeted case management services"
						any_clms_ndc_ml_code_004 = "Any Claims With ndc_ml_code = ADHD Medications"
						any_clms_ndc_ml_code_009 = "Any Claims With ndc_ml_code = Antipsychotic Medications"
						any_clms_ndc_ml_code_034 = "Any Claims With ndc_ml_code = Oral Antipsychotic Medications"
						any_clms_ndc_ml_code_036 = "Any Claims With ndc_ml_code = Potentially Harmful Drugs�Rate 1 and Rate 2 Medications"
						BENE_CERPAL = "CCW Cerebral Palsy�beneficiary flag"
						BENE_EPILEP	= "CCW Epilepsy�beneficiary flag"
						BENE_INTDIS = "CCW Intellectual Disabilities and Related Conditions�beneficiary flag"
						BENE_LEADIS	= "CCW Learning Disabilities�beneficiary flag"
						BENE_OTHDEPL = "CCW Other Developmental Delays beneficiary flag"
						cc_adhd = "Attention deficit hyperactivity disorder beneficiary flag"
						oth_mh_flag = "Other mental health conditions category beneficiary flag"
						SEX_CD_F = "Female"
						SEX_CD_M = "Male"
						SEX_CD_Miss = "Sex Missing"
						INCM_CD_01 = "Individual's State-defined family income is from 0 to 100% of the federal poverty level (FPL)"
						INCM_CD_02 = "Individual's State-defined family income is from 101 to 133% of the FPL"
						INCM_CD_03 = "Individual's State-defined family income is from 134 to 150% of the FPL"
						INCM_CD_04 = "Individual's State-defined family income is from 151 to 200% of the FPL"
						INCM_CD_05 = "Individual's State-defined family income is from 201 to 255% of the FPL"
						INCM_CD_06 = "Individual's State-defined family income is from 256 to 300% of the FPL"
						INCM_CD_07 = "Individual's State-defined family income is from 301 to 400% of the FPL"
						INCM_CD_08 = "Individual's State-defined family income is over 400% of the FPL"
						INCM_CD_miss = "Income Missing"
						RACE_ETHNICITY_AIAN = "American Indian and Alaska Native, non-Hispanic"
						RACE_ETHNICITY_Asian = "Asian, non-Hispanic"
						RACE_ETHNICITY_Black = "Black, non-Hispanic"
						RACE_ETHNICITY_Hawaiian_PI = "Hawaiian/Pacific Islander"
						RACE_ETHNICITY_Hispanic = "Hispanic, all races"
						RACE_ETHNICITY_White = "White, non-Hispanic"
						RACE_ETHNICITY_Multi = "Multiracial, non-Hispanic"
						RACE_ETHNICITY_Unknown = "Race/Ethnicity Unknown"
						RACE_ETHNICITY_Miss = "Race/Ethnicity Missing"
						AGE_NUM = "Age"
						;
				run;

				/* Determine if there is a variation in the ssi_id variable in the given state (i.e., there are 1s and 0s) */
				/* Determine number of benes in each state */
				proc sql noprint;
					select count(distinct ssi_id), count(*) into :ssi_diff, :num_state_benes
					from analytic_&year._&state2.
					where not missing(ssi_id);
				quit;

				%if %sysevalf(&ssi_diff. > 1) %then %do;
					title "&state2., &year.";

					/* Run random forest on state's training benes */
					%if %sysevalf(&rerun_hpf. = 1) %then %do;
						%macro run_hp;

							%let threshold = 1500000;
							%if %sysevalf(&num_state_benes.*(8/10) > &threshold.) %then %let pct_samp = %sysfunc(round(&threshold./&num_state_benes.,0.1));
							%else %let pct_samp = 0.8;
							%if &year. = 2017 and "&state2." = "CA" %then %let pct_samp = 0.25; /* needed for CA 2017 */
							%put Num Benes: &num_state_benes.;
							%put Threshold: &threshold.;
							%put Pct Samp: &pct_samp.;
							title2 "Proc Survey Select for Train Data";
							proc surveyselect data = analytic_&year._&state2.
								out = rs_analytic_&year._&state2.
								method = srs
								samprate = &pct_samp.
								seed = 123;
							run;
							title2 "HP Forest Results";
							proc hpforest data = rs_analytic_&year._&state2. maxtrees = 150;
								id msis_id submtg_state_cd;
								target ssi_id / level = binary;
								input &binary_vars. / level = binary;
								input &inter_vars. / level = interval;
								input &cat_vars. / level = nominal;
								score out = rf_train_&year._&state2.;
								ods output VariableImportance = loss_reduction_importance_&state2.;
								save file = "&run_dir./03_data/Random_Forest/&year./Models/&state2._&year._Model.sav";
							run;
							proc datasets lib = permwork nolist;
								delete rs_analytic_&year._&state2.;
							run;

						%mend run_hp;
						%run_hp;
					%end;

					/* Run the state's random forest model on all of state's benes */
					proc hp4score data = analytic_&year._&state2.;
						id msis_id submtg_state_cd ssi_id bene_cnty_cd incm_cd &binary_vars. &inter_vars. &cat_vars.;
						score file = "&run_dir./03_data/Random_Forest/&year./Models/&state2._&year._Model.sav" out = rf_&year._&state2.;
					run;

					/* Create additional variables that will be used for graphs and tables */
					/* Merge on the training data to identify which benes were in the training data if we re-ran the HP Forest model */
					%if %sysevalf(&rerun_hpf. = 1) %then %do;
						proc sort data = rf_train_&year._&state2. (keep = msis_id submtg_state_cd);
							by msis_id submtg_state_cd;
						run;
						proc sort data = rf_&year._&state2.;
							by msis_id submtg_state_cd;
						run;
					%end;
					data rf2_&year._&state2.;

						%if %sysevalf(&rerun_hpf. = 1) %then %do;
							merge rf_&year._&state2. (in=a) rf_train_&year._&state2. (in=b);
							by msis_id submtg_state_cd;
							if a and b then trained = 1;
							if a and not b then trained = 0;
							if b and not a then trained = .;
						%end;
						%else %do;
							set rf_&year._&state2.;
						%end;

						tot_bene_count = 1;

						length p_ssi_id1_range $7.;
						if p_ssi_id1 < 0.05 then p_ssi_id1_range = " 0-5%";
						if p_ssi_id1 >= 0.05 and p_ssi_id1 < 0.1 then p_ssi_id1_range = " 5-10%";
						if p_ssi_id1 >= 0.1 and p_ssi_id1 < 0.15 then p_ssi_id1_range = "10-15%";
						if p_ssi_id1 >= 0.15 and p_ssi_id1 < 0.2 then p_ssi_id1_range = "15-20%";
						if p_ssi_id1 >= 0.2 and p_ssi_id1 < 0.25 then p_ssi_id1_range = "20-25%";
						if p_ssi_id1 >= 0.25 and p_ssi_id1 < 0.3 then p_ssi_id1_range = "25-30%";
						if p_ssi_id1 >= 0.3 and p_ssi_id1 < 0.35 then p_ssi_id1_range = "30-35%";
						if p_ssi_id1 >= 0.35 and p_ssi_id1 < 0.4 then p_ssi_id1_range = "35-40%";
						if p_ssi_id1 >= 0.4 and p_ssi_id1 < 0.45 then p_ssi_id1_range = "40-45%";
						if p_ssi_id1 >= 0.45 and p_ssi_id1 < 0.5 then p_ssi_id1_range = "45-50%";
						if p_ssi_id1 >= 0.5 and p_ssi_id1 < 0.55 then p_ssi_id1_range = "50-55%";
						if p_ssi_id1 >= 0.55 and p_ssi_id1 < 0.6 then p_ssi_id1_range = "55-60%";
						if p_ssi_id1 >= 0.6 and p_ssi_id1 < 0.65 then p_ssi_id1_range = "60-65%";
						if p_ssi_id1 >= 0.65 and p_ssi_id1 < 0.7 then p_ssi_id1_range = "65-70%";
						if p_ssi_id1 >= 0.7 and p_ssi_id1 < 0.75 then p_ssi_id1_range = "70-75%";
						if p_ssi_id1 >= 0.75 and p_ssi_id1 < 0.8 then p_ssi_id1_range = "75-80%";
						if p_ssi_id1 >= 0.8 and p_ssi_id1 < 0.85 then p_ssi_id1_range = "80-85%";
						if p_ssi_id1 >= 0.85 and p_ssi_id1 < 0.9 then p_ssi_id1_range = "85-90%";
						if p_ssi_id1 >= 0.9 and p_ssi_id1 < 0.95 then p_ssi_id1_range = "90-95%";
						if p_ssi_id1 >= 0.95 then p_ssi_id1_range = "95-100%";

						non_ssi_gt40pct_prob_ssi = (p_ssi_id1 > 0.4 and ssi_id = 0);

						format ssi_id flags.;
					run;
					proc datasets lib = permwork nolist;
						delete analytic_&year._&state2. %if %sysevalf(&rerun_hpf. = 1) %then %do; rf_train_&year._&state2. %end; rf_&year._&state2.;
					run;
					%if %sysevalf(&rerun_hpf. = 1) %then %do;
						title2 "Number of Benes The Model Was Trained On";
						proc freq data = rf2_&year._&state2.;
							table trained / list missing;
						run;
					%end;

					/* Table prep */
					%global obs_50 obs_45 obs_40 obs_35 obs_30 obs_25 obs_20 obs_15 obs_10 obs_ssi obs_non_ssi;

					/* Summary of benes not on SSI with > X% prob of being on SSI */
					%let incm_cd_restrict = incm_cd in ("01", "02", "03", "04", "05") or incm_cd_miss = 1;
					%macro create_tables(rincm);

						* create individual prob tables;
						%macro pop_table(table, prob, prob_stmt, ssi, title, mean_var);
							proc sql noprint;
								select count(*) into :obs_&table.
								from rf2_&year._&state2.
								where ssi_id = &ssi. 
									  %if &prob_stmt. = 1 %then %do; and p_ssi_id1 > 0.&prob. %end;
									  %if &rincm. = 1 and &table. ne ssi and &table. ne non_ssi %then %do; and (&incm_cd_restrict.) %end;
								;
							quit;
							%if &&obs_&table.. > 0 %then %do;
								title2 "Proc Means of Variables for &title.";
								%if &rincm. = 1 %then %do; title3 "Restricted to Benes with Income in the 01-05 or Missing Categories"; %end;
								proc means data = rf2_&year._&state2. (where = (ssi_id = &ssi. 
									  											%if &prob_stmt. = 1 %then %do; and p_ssi_id1 > 0.&prob. %end;
									  											%if &rincm. = 1 and &table. ne ssi and &table. ne non_ssi %then %do; and (&incm_cd_restrict.) %end;)) STACKODSOUTPUT;
									var &table_vars.;
									ods output summary = rf2_&year._&state2._&table.;
								run;
								proc sql noprint;
									select max(n) into :num_benes
									from rf2_&year._&state2._&table.;
								quit;
								%if &rincm. = 1 %then %do;
									proc sql noprint;
										select count(*) into :num_benes_missincm
										from rf2_&year._&state2.
										where ssi_id = &ssi. and incm_cd_miss = 1
									 	 	  %if &prob_stmt. = 1 %then %do; and p_ssi_id1 > 0.&prob. %end;
										;
									quit;
								%end;
								proc sql;
									insert into rf2_&year._&state2._&table.(variable, mean)
									values('_n', &num_benes.)
									%if &rincm. = 1 %then %do;
										values('_n_miss_incm_cd', &num_benes_missincm.)
									%end;
									;
								quit;
								data rf2_&year._&state2._&table.;
									set rf2_&year._&state2._&table. (rename = (mean = &mean_var.));
									if variable ne "_n" and variable ne "_n_miss_incm_cd" and (1 <= &num_benes. * &mean_var. <= 10) and 
									   lowcase(substr(variable, 1, 3)) not in ("num", "min", "max", "tot", "avg") and index(lowcase(variable), "median") = 0 then &mean_var. = 99999999;
									if variable in ("_n", "_n_miss_incm_cd") and 1 <= &mean_var. <= 10 then &mean_var. = 999999999;
									if variable = "_n" then label = "Number of benes included in means";
									if variable = "_n_miss_incm_cd" then label = "Number of benes included in means with missing income information";
									keep variable label &mean_var.;
									format &mean_var. mask_means.;
								run;
							%end;
						%mend pop_table;
						%pop_table(table=50, prob=5, prob_stmt=1, ssi=0, title=%str(Non-SSI Benes With > 50% of Being on SSI), mean_var=mean_nonssi_gt50pct_prob_ssi);
						%pop_table(table=45, prob=45, prob_stmt=1, ssi=0, title=%str(Non-SSI Benes With > 45% of Being on SSI), mean_var=mean_nonssi_gt45pct_prob_ssi);
						%pop_table(table=40, prob=4, prob_stmt=1, ssi=0, title=%str(Non-SSI Benes With > 40% of Being on SSI), mean_var=mean_nonssi_gt40pct_prob_ssi);
						%pop_table(table=35, prob=35, prob_stmt=1, ssi=0, title=%str(Non-SSI Benes With > 35% of Being on SSI), mean_var=mean_nonssi_gt35pct_prob_ssi);
						%pop_table(table=30, prob=3, prob_stmt=1, ssi=0, title=%str(Non-SSI Benes With > 30% of Being on SSI), mean_var=mean_nonssi_gt30pct_prob_ssi);
						%pop_table(table=25, prob=25, prob_stmt=1, ssi=0, title=%str(Non-SSI Benes With > 25% of Being on SSI), mean_var=mean_nonssi_gt25pct_prob_ssi);
						%pop_table(table=20, prob=2, prob_stmt=1, ssi=0, title=%str(Non-SSI Benes With > 20% of Being on SSI), mean_var=mean_nonssi_gt20pct_prob_ssi);
						%pop_table(table=15, prob=15, prob_stmt=1, ssi=0, title=%str(Non-SSI Benes With > 15% of Being on SSI), mean_var=mean_nonssi_gt15pct_prob_ssi);
						%pop_table(table=10, prob=1, prob_stmt=1, ssi=0, title=%str(Non-SSI Benes With > 10% of Being on SSI), mean_var=mean_nonssi_gt10pct_prob_ssi);
						%pop_table(table=ssi, prob=, prob_stmt=0, ssi=1, title=%str(All SSI Benes), mean_var=mean_ssi);
						%pop_table(table=non_ssi, prob=, prob_stmt=0, ssi=0, title=%str(All Non-SSI Benes), mean_var=mean_non_ssi);

						*merge individual tables;
						%if &obs_10. > 0 %then %do;
							proc sql;
								create table rf2_&year._&state2._prob_all_&rincm. as
								select 	 a.*
										,b.mean_non_ssi
										%if &obs_10. > 0 %then %do;
											,c.mean_nonssi_gt10pct_prob_ssi
										%end;
										%if &obs_15. > 0 %then %do;
											,d.mean_nonssi_gt15pct_prob_ssi
										%end;
										%if &obs_20. > 0 %then %do;
											,e.mean_nonssi_gt20pct_prob_ssi
										%end;
										%if &obs_25. > 0 %then %do;
											,f.mean_nonssi_gt25pct_prob_ssi
										%end;
										%if &obs_30. > 0 %then %do;
											,g.mean_nonssi_gt30pct_prob_ssi
										%end;
										%if &obs_35. > 0 %then %do;
											,h.mean_nonssi_gt35pct_prob_ssi
										%end;
										%if &obs_40. > 0 %then %do;
											,i.mean_nonssi_gt40pct_prob_ssi
										%end;
										%if &obs_45. > 0 %then %do;
											,j.mean_nonssi_gt45pct_prob_ssi
										%end;
										%if &obs_50. > 0 %then %do;
											,k.mean_nonssi_gt50pct_prob_ssi
										%end;
								from rf2_&year._&state2._ssi as a
								left join rf2_&year._&state2._non_ssi as b on a.variable = b.variable
								%if &obs_10. > 0 %then %do;
									left join rf2_&year._&state2._10 as c on a.variable = c.variable
								%end;
								%if &obs_15. > 0 %then %do;
									left join rf2_&year._&state2._15 as d on a.variable = d.variable
								%end;
								%if &obs_20. > 0 %then %do;
									left join rf2_&year._&state2._20 as e on a.variable = e.variable
								%end;
								%if &obs_25. > 0 %then %do;
									left join rf2_&year._&state2._25 as f on a.variable = f.variable
								%end;
								%if &obs_30. > 0 %then %do;
									left join rf2_&year._&state2._30 as g on a.variable = g.variable
								%end;
								%if &obs_35. > 0 %then %do;
									left join rf2_&year._&state2._35 as h on a.variable = h.variable
								%end;
								%if &obs_40. > 0 %then %do;
									left join rf2_&year._&state2._40 as i on a.variable = i.variable
								%end;
								%if &obs_45. > 0 %then %do;
									left join rf2_&year._&state2._45 as j on a.variable = j.variable
								%end;
								%if &obs_50. > 0 %then %do;
									left join rf2_&year._&state2._50 as k on a.variable = k.variable
								%end;
								order by variable;
							quit;
						%end;

					%mend create_tables;
					%if %sysevalf(&rerun_tables. = 1) %then %do;
						/* Create main tables */
						%create_tables(rincm=0);
						%create_tables(rincm=1);

						/* Run the state's random forest model on all of the U.S. */
						proc hp4score data = tmp.analytic_file_&year._2;
							id msis_id submtg_state_cd ssi_id;
							score file = "&run_dir./03_data/Random_Forest/&year./Models/&state2._&year._Model.sav" out = rf_&year._&state2._allus;
						run;
						data rf_&year._&state2._allus;
							set rf_&year._&state2._allus;
							if submtg_state_cd not in (&states3.) then ssi_id = 0;
							non_ssi_gt40pct_prob_ssi = (p_ssi_id1 > 0.4 and ssi_id = 0);
						run;
					%end;

				%end;

				%else %do;

					%put There is no variation in the SSI Indicator in State &state2. and therefore a random forest cannot be generated;

					/* Delete dataset */
					proc datasets lib = permwork nolist;
						delete analytic_&year._&state2.;
					run;

					ods html close;

				%end;

				/* Stop log/lst */
				proc printto;
				run;
				ods html close;

			%mend run_state;
			%run_state;

		/* Grid code: End remote submission for a task */
		ENDRSUBMIT;
		%put TASK&s. CODE RAN ON MACHINE %sysfunc(GRDSVC_GETNAME(TASK&s.));

	%end;

	/*******
	Grid code: Wait for all grid tasks to complete and sign off
	*******/
	WAITFOR _ALL_ &task_compile;
	SIGNOFF _ALL_;

	/* Create graphs for all states and export to a single Excel file */
	/* NOTE: only re-create if the model has changed */
	%if %sysevalf(&rerun_hpf. = 1) %then %do;
		%do s = 1 %to &totcnt.;
			%let state = %scan(&states., &s.);
			%let state2 = %scan(&states2., &s.);
			%if &s. = 1 %then %do;
				options device = actximg;
				ods excel file = "&out_excel./graphs_&year._&datestamp..xlsx" style=HTMLBlue options(embedded_titles = "yes" sheet_interval = "none" sheet_name = "&state2._&year.");
			%end;
			%else %do;
				ods excel options(embedded_titles = "yes" sheet_interval = "now" sheet_name = "&state2._&year.");
			%end;
			goptions reset = pattern;
			pattern1 color = "#E0D4B5";
			title "&state2., &year.";
			%macro prob_of_on_ssi_graphs(trained, title);

				/* Define graph ranges */
				%let y_axis_range = '0%' '10%' '20%' '30%' '40%' '50%' '60%' '70%' '80%' '90%' '100%';
				%let x_axis_range = '0-5%' '5-10%' '10-15%' '15-20%' '20-25%' '25-30%' '30-35%' '35-40%' '40-45%' '45-50%' '50-55%' '55-60%' '60-65%'  
									'65-70%' '70-75%' '75-80%' '80-85%' '85-90%' '90-95%' '95-100%';
				%let midpoints = 0.025 0.075 0.125 0.175 0.225 0.275 0.325 0.375 0.425 0.475 0.525 0.575 0.625 0.675 0.725 0.775 0.825 0.875 0.925 0.975;

				/* Prob of being on SSI when on SSI */
				goptions reset = pattern;
				pattern1 color = "#E0D4B5";
				axis1 label = (f = "Montserrat/Bold" "Probability of Being on SSI") value = (&x_axis_range. h = 8pt a = 90);
				axis2 label = (a = 90 f = "Montserrat/Bold" "Percent") order = (0 to 100 by 10) value = (&y_axis_range.) minor = (n = 5);
				title2 "Distribution of the Probability That a Bene is on SSI";
				title3 "For Benes on SSI"; 
				title4 &title.;
				proc gchart data = rf2_&year._&state2. (where = (ssi_id = 1 &trained.));
					vbar p_ssi_id1 /
						midpoints = &midpoints. type = PCT outside = PCT
						raxis = axis2 maxis = axis1 coutline = black woutline = 1;
				run; quit; 

				/* Prob of being on SSI whe not on SSI */
				goptions reset = pattern;
				pattern1 color = "#046B5C";
				title3 "For Benes Not on SSI"; 
				title4 &title.;
				proc gchart data = rf2_&year._&state2. (where = (ssi_id = 0 &trained.));
					vbar p_ssi_id1 /
						midpoints = &midpoints. type = PCT outside = PCT
						raxis = axis2 maxis = axis1 coutline = black woutline = 1;
				run; quit; 

				/* Percentage of benes on SSI for a range of prob of being on SSI */
				goptions reset = pattern;
				pattern1 color = "#046B5C";
				pattern2 color = "#E0D4B5";
				axis1 label = (f = "Montserrat/Bold" "") value = (h = 8pt a = 90);
				axis2 label = (a = 90 f = "Montserrat/Bold" "Percent") order = (0 to 100 by 10) value = ('0%' '10%' '20%' '30%' '40%' '50%' '60%' '70%' '80%' '90%' '100%') minor = (n = 5);
				axis3 label = none value = none;
				title2 "Percent of Benes on SSI for Different Probabilities of Being on SSI";
				title3 &title.;							
				legend1 across = 2 label = ("On SSI" position = (top center) height = 1) position = (bottom outside center) frame;
				proc gchart data = rf2_&year._&state2. (where = (ssi_id in (0,1) &trained.));
					vbar tot_bene_count /
						type = PCT subgroup = ssi_id g100 group = p_ssi_id1_range
						gaxis = axis1 maxis = axis3 raxis = axis2 coutline = black woutline = 1 legend = legend1;
				run; quit; 

			%mend prob_of_on_ssi_graphs;
			%prob_of_on_ssi_graphs(trained=, title=%str("Full Sample"));
			%prob_of_on_ssi_graphs(trained=%str(and trained = 0), title=%str("Benes The Model Was Not Trained On"));
			%prob_of_on_ssi_graphs(trained=%str(and trained = 1), title=%str("Benes The Model Was Trained On"));

		%end;
		ods excel close;
	%end;

	/* Export graph data */
	%if %sysevalf(&rerun_hpf. = 1) %then %do;
		%do s = 1 %to &totcnt.;
			%let state = %scan(&states., &s.);
			%let state2 = %scan(&states2., &s.);
			%if &s. = 1 %then %do;
				options device = actximg;
				ods excel file = "&out_excel./graph_data_&year._&datestamp..xlsx" style=HTMLBlue options(embedded_titles = "yes" sheet_interval = "now" sheet_name = "&state2._&year.");
			%end;
			%else %do;
				ods excel options(embedded_titles = "yes" sheet_interval = "now" sheet_name = "&state2._&year.");
			%end;
			%macro create_graph_data(overall, trained, varname);
				proc sql;
					create table rf2_&year._&state2._gphdata_&trained. as
					select p_ssi_id1_range, ssi_id, count(*) as num_benes
					from rf2_&year._&state2. %if &overall. ne 1 %then %do; (where = (trained = &trained.)) %end;
					group by p_ssi_id1_range, ssi_id;
				quit;
				proc transpose data = rf2_&year._&state2._gphdata_&trained. out = rf2_&year._&state2._gphdata_&trained._t (drop =  _NAME_) prefix = num_benes_ssi_id;
					by p_ssi_id1_range;
					id ssi_id;
					var num_benes;
				run;
				data rf2_&year._&state2._gphdata_&trained._t;
					set rf2_&year._&state2._gphdata_&trained._t;
					&varname._pct_on_ssi = num_benes_ssi_idYes / sum(num_benes_ssi_idYes, num_benes_ssi_idNo);
					&varname._pct_not_on_ssi = num_benes_ssi_idNo / sum(num_benes_ssi_idYes, num_benes_ssi_idNo);
				run;
			%mend create_graph_data;
			%if %sysevalf(&rerun_only_overall_graph_data. = 1) %then %do;
				%create_graph_data(overall=1, trained=, varname=overall);
				data rf2_&year._&state2._gphdata_all;
					set rf2_&year._&state2._gphdata__t;
				run;
			%end;
			%else %do;
				%create_graph_data(overall=1, trained=, varname=overall);
				%create_graph_data(overall=0, trained=1, varname=trained);
				%create_graph_data(overall=0, trained=0, varname=not_trained);
				proc sql;
					create table rf2_&year._&state2._gphdata_all as
					select 	a.p_ssi_id1_range,
							a.overall_pct_on_ssi,
							a.overall_pct_not_on_ssi,
							b.trained_pct_on_ssi, 
							b.trained_pct_not_on_ssi,
							c.not_trained_pct_on_ssi, 
							c.not_trained_pct_not_on_ssi
					from rf2_&year._&state2._gphdata__t as a
					left join rf2_&year._&state2._gphdata_1_t as b on a.p_ssi_id1_range = b.p_ssi_id1_range
					left join rf2_&year._&state2._gphdata_0_t as c on a.p_ssi_id1_range = c.p_ssi_id1_range;
				quit;
			%end;
			proc print data = rf2_&year._&state2._gphdata_all;
			run;
		%end;
		ods excel close;
	%end;

	/* Export all state tables that show the mean value of variables for different probs of being on SSI to a single Excel file */
	%if %sysevalf(&rerun_tables. = 1) %then %do;
		%macro export_tables(file, rincm);
			%do s = 1 %to &totcnt.;
				%let state = %scan(&states., &s.);
				%let state2 = %scan(&states2., &s.);
				%if &s. = 1 %then %do;
					options device = actximg;
					ods excel file = "&out_excel./tables&file._&year._&datestamp..xlsx" options(embedded_titles = 'yes' sheet_interval = 'now' sheet_name = "&state2._&year.");
				%end;
				%else %do;
					ods excel options(embedded_titles = 'yes' sheet_interval = 'now' sheet_name = "&state2._&year.");
				%end;
				title "&state2., &year.";
				title2 "Means of Variables for Non-SSI Benes With Varying Probability of Being on SSI";
				proc print data = rf2_&year._&state2._prob_all_&rincm.;
				run;
				proc datasets lib = permwork nolist;
					delete rf2_&year._&state2._prob_all_&rincm.;
				run;
			%end;
			ods excel close;
		%mend export_tables;
		%if %sysevalf(&rerun_tables. = 1) %then %do;
			%export_tables(file=, rincm=0);
			%export_tables(file=_incm_restrict, rincm=1);
		%end;
	%end;

	/* Create and export tables that show distribution of benes not on SSI but with a > 40% prob of being on SSI by county */
	%if %sysevalf(&rerun_tables. = 1) %then %do;
		%do s = 1 %to &totcnt.;
			%let state = %scan(&states., &s.);
			%let state2 = %scan(&states2., &s.);
			%if &s. = 1 %then %do;
				options device = actximg;
				ods excel file = "&out_excel./tables_nonssi_highprobssi_cntydist_&year._&datestamp..xlsx" options(embedded_titles = 'yes' sheet_interval = 'now' sheet_name = "&state2._&year.");
			%end;
			%else %do;
				ods excel options(embedded_titles = 'yes' sheet_interval = 'now' sheet_name = "&state2._&year.");
			%end;
			title "&state2., &year.";
			title2 "County Distribution of Number Benes Not on SSI With > 40% Probablity of Being on SSI and Number of Benes on SSI";
			proc means data = rf2_&year._&state2. nway missing noprint;
				class bene_cnty_cd;
				output out = rf2_&year._&state2._cntyfreq (drop = _TYPE_ _FREQ_) sum(ssi_id non_ssi_gt40pct_prob_ssi)=;
			run;
			data rf2_&year._&state2._cntyfreq;
				set rf2_&year._&state2._cntyfreq;
				if 1 <= ssi_id <= 10 then ssi_id = 999999999;
				if 1 <= non_ssi_gt40pct_prob_ssi <= 10 then non_ssi_gt40pct_prob_ssi = 999999999;
				format ssi_id non_ssi_gt40pct_prob_ssi mask_means.;
				rename ssi_id = ssi_benes
					   non_ssi_gt40pct_prob_ssi = non_ssi_benes_gt40pct_prob_ssi
				;
			run;
			proc print data = rf2_&year._&state2._cntyfreq;
			run;
		%end;
		ods excel close;
	%end;

	/* Create and export tables that show distribution of benes in the U.S. not on SSI but with a > 40% prob of being on SSI by state using each state's model */
	%if %sysevalf(&rerun_tables. = 1) %then %do;
		%do s = 1 %to &totcnt.;
			%let state = %scan(&states., &s.);
			%let state2 = %scan(&states2., &s.);
			%if &s. = 1 %then %do;
				options device = actximg;
				ods excel file = "&out_excel./tables_nonssi_highprobssi_stdist_&year._&datestamp..xlsx"  options(embedded_titles = 'yes' sheet_interval = 'now' sheet_name = "&state2._&year.");
			%end;
			%else %do;
				ods excel options(embedded_titles = 'yes' sheet_interval = 'now' sheet_name = "&state2._&year.");
			%end;
			title "All U.S., &year.";
			title2 "State Distribution of Number Benes Not on SSI With > 40% Probablity of Being on SSI and Number of Benes on SSI";
			title3 "Using &state2.'s Random Forest Model";
			proc means data = rf_&year._&state2._allus nway missing noprint;
				class submtg_state_cd;
				output out = rf2_&year._&state2._stfreq (drop = _TYPE_ _FREQ_) sum(ssi_id non_ssi_gt40pct_prob_ssi)=;
			run;
			data rf2_&year._&state2._stfreq;
				set rf2_&year._&state2._stfreq;
				if 1 <= ssi_id <= 10 then ssi_id = 999999999;
				if 1 <= non_ssi_gt40pct_prob_ssi <= 10 then non_ssi_gt40pct_prob_ssi = 999999999;
				format ssi_id non_ssi_gt40pct_prob_ssi mask_means.;
				rename ssi_id = ssi_benes
					   non_ssi_gt40pct_prob_ssi = non_ssi_benes_gt40pct_prob_ssi
				;
			run;
			proc print data = rf2_&year._&state2._stfreq;
			run;
			proc datasets lib = permwork nolist;
				delete rf2_&year._&state2.:;
			run;
		%end;
		ods excel close;
	%end;

	/* Export the Loss Reduction Variable Importance for each state's model */
	/* NOTE: only re-create if the model has changed */
	%if %sysevalf(&rerun_hpf. = 1) %then %do;
		%do s = 1 %to &totcnt.;
			%let state = %scan(&states., &s.);
			%let state2 = %scan(&states2., &s.);
			%if &s. = 1 %then %do;
				options device = actximg;
				ods excel file = "&out_excel./tables_loss_rdctn_imprtnc_&year._&datestamp..xlsx"  options(embedded_titles = 'yes' sheet_interval = 'now' sheet_name = "&state2._&year.");
			%end;
			%else %do;
				ods excel options(embedded_titles = 'yes' sheet_interval = 'now' sheet_name = "&state2._&year.");
			%end;
			title "&state2., &year.";
			title2 "Loss Reduction Importance Table From Random Forest Model";
			proc print data = loss_reduction_importance_&state2.;
			run;
			proc datasets lib = permwork nolist;
				delete loss_reduction_importance_&state2.;
			run;
		%end;
		ods excel close;
	%end;
	
	/* Clean environment */
	proc datasets lib = work kill nolist;
	run;
	proc datasets lib = permwork kill nolist;
	run;
	proc datasets lib = tmp kill nolist;
	run;

%mend random_forest;

*====================================================================*;        
* Calls														 		 *;
*====================================================================*;
%let yearinput = [ENTER YEAR];
%random_forest(year=&yearinput., rerun_hpf=1, rerun_only_overall_graph_data=0, rerun_tables=1);
